# Run on R 3.3.0 GUI 1.68 Mavericks build (7202)
# June 3, 2016

########################################################
# FIGURE 2. Threeway Interaction. Outliers dropped.
########################################################


library(foreign)
library(car)
library(arm)
library(lmtest)
library(sandwich)
library(ggplot2)
library(gvlma)
library(MASS)
library(leaps)
library(survival)
library(memisc)
library(msm)
library(plm)	
	
##### Set Path 
# setwd()

fulldata <- read.dta("industry_country_no_outliers.dta")

attach(fulldata)
summary(fulldata)

country <- fulldata$country
wtaxcomp <- fulldata$wtaxcomp
tariff <- fulldata$tariff
digit2 <- fulldata$digit2
region <- fulldata$region
avgcompetitors <- fulldata$avgcompetitors
lnavgexp <- fulldata$lnavgexport
lntotcap <- fulldata$lntotcap
obsolete <- fulldata$obsolete
lntariff <- fulldata$lntariff
lnavgexp <- fulldata$lnavgexport
lnavgage <- fulldata$lnavgage
lntotlabor <- fulldata$lntotlabor
gov_owner_share <-fulldata$gov_owner_share
lowfc <- fulldata$lowfc
idc <- fulldata$idc
mining <- fulldata$mining
free_media <- fulldata$free_media
ln_tot_pop <- fulldata$ln_tot_pop	
	
res <- lm(formula = wtaxcomp ~ factor(region) + lnavgage + lnavgexp + avgcompetitors + lntotlabor + free_media + ln_tot_pop + gov_owner_share + lowfc + obsolete + tariff + tariff:obsolete + tariff:lowfc + obsolete:lowfc + tariff:obsolete:lowfc + factor(sector),  data = fulldata)

summary(res)

# Output with clustered errors

	cl.coeff  <- function(dat,fm, cluster){
           attach(dat, warn.conflicts = F)
           library(sandwich)
           M <- length(unique(cluster))   
           N <- length(cluster)  	        
           K <- fm$rank		             
           dfc <- (M/(M-1))*((N-1)/(N-K))  
           uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
           vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
           coeftest(fm, vcovCL)
	}
	
	# apply the 'cl' function by choosing a variable to cluster on.
	# cluster at country.
	clustercoef <- cl.coeff(fulldata,res,idc)

	clustercoef
	
# Variance covariance matrix with clustered errors

	cl.varcovar  <- function(dat,fm, cluster){
           attach(dat, warn.conflicts = F)
           library(sandwich)
           M <- length(unique(cluster))   
           N <- length(cluster)  	        
           K <- fm$rank		             
           dfc <- (M/(M-1))*((N-1)/(N-K))  
           uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
           vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
           coeftest(fm, vcovCL)
		# Show the variance-covariance-matrix 
		return(vcovCL) 
	}

# Store the covariance matrix
clustercovar <- cl.varcovar(fulldata,res,idc)

# Coefficients
est.obs <- clustercoef[11,1]
est.obs.tariff <- clustercoef[20,1]
est.obs.lowfc <- clustercoef[22,1]
est.obs.lowfc.tariff <- clustercoef[23,1]

# Simulation values: from 0 to 30 tariff protection, one by one. 
at.tariff <- seq(0,30,1)

x <- obsolete
z <- tariff
w <- lowfc

est.x <- est.obs
est.xz <- est.obs.tariff
est.xw <- est.obs.lowfc
est.xzw <- est.obs.lowfc.tariff

# Vars and Covars 
var.x <- clustercovar[11,11]
var.x.z <- clustercovar[20,20]
var.x.w <- clustercovar[22,22]
var.x.z.w <- clustercovar[23,23]

covar.x_x.z <- clustercovar[20,11]
covar.x_x.w <- clustercovar[22,11]
covar.x_x.w.z <- clustercovar[23,11]

covar.x.z_x.w <- clustercovar[20,22]
covar.x.z_x.w.z <- clustercovar[20,23]
covar.x.w_x.w.z <- clustercovar[22,23]


####################################
# FIGURE 2 - LEFT
####################################

# COMPARE LOWER TO HIGHER QUARTILE OF FISCAL CAPACITY

# LOWER QUARTILE
at.lowfc.1 <- -.24
		
# Predicted Expected Value 
slopes.1 <- est.x + est.xz*at.tariff + est.xw*at.lowfc.1 + est.xzw*at.tariff*at.lowfc.1

SEs.1 <- rep(NA, length(at.tariff))
 	for (i in 1: length(at.tariff)) {
			j <-at.tariff[i]
		SEs.1[i] = (var.x + (j^2)*(var.x.z) + (at.lowfc.1^2)*(var.x.w) + (j^2)*(at.lowfc.1^2)*(var.x.z.w) + 2*j*covar.x_x.z + 2*at.lowfc.1*covar.x_x.w + 2*j*at.lowfc.1*covar.x_x.w.z + 2*j*at.lowfc.1*covar.x.z_x.w + 2*(j^2)*at.lowfc.1*covar.x.z_x.w.z + 2*j*(at.lowfc.1^2)*covar.x.w_x.w.z)^(1/2)
 		}

upper.1 <- slopes.1 + 1.6449*SEs.1
lower.1 <- slopes.1 - 1.6449*SEs.1

cbind(at.tariff, slopes.1, upper.1, lower.1)

# HIGHER QUARTILE
at.lowfc.2 <- -1.08

# Predicted Expected Value
slopes.2 <- est.x + est.xz*at.tariff + est.xw*at.lowfc.2 + est.xzw*at.tariff*at.lowfc.2

SEs.2 <- rep(NA, length(at.tariff))

	for (i in 1: length(at.tariff)) {
		j <-at.tariff[i]
		SEs.2[i] = (var.x + (j^2)*(var.x.z) + (at.lowfc.2^2)*(var.x.w) + (j^2)*(at.lowfc.2^2)*(var.x.z.w) + 2*j*covar.x_x.z + 2*at.lowfc.2*covar.x_x.w + 2*j*at.lowfc.2*covar.x_x.w.z + 2*j*at.lowfc.2*covar.x.z_x.w + 2*(j^2)*at.lowfc.2*covar.x.z_x.w.z + 2*j*(at.lowfc.2^2)*covar.x.w_x.w.z)^(1/2)
		}

# 90 CI, Two Tailed
upper.2 <- slopes.2 + 1.6449*SEs.2
lower.2 <- slopes.2 - 1.6449*SEs.2

cbind(at.tariff, slopes.2, upper.2, lower.2)


# pdf(".../Figure_2_left.pdf", width=10,height=7)	
plot(at.tariff, slopes.2, type = "l", lty = 1, ylim = c(-.5, 1), xlab = "Tariff Protection", ylab = "Marginal Effect of Obsolescence on Tax Compliance", lwd=2.5, col=rgb(170, 170, 170, 250, maxColorValue=255), frame.plot=FALSE,cex.lab = 1.3)
	lines(at.tariff, upper.2, col=rgb(170, 170, 170, 200, maxColorValue=255), lty=2, lwd=1)
	lines(at.tariff, lower.2, col=rgb(170, 170, 170, 200, maxColorValue=255), lty=2, lwd=1)
	# Zero-Line
	lines(at.tariff, rep(0, length(at.tariff)), type = "l", col = "black")
	# Grid for background
	grid(col = "gray80")
	# Makes Graph nicer
	frame.y1 <- c(lower.2,rev(upper.2))
	frame.x1 <- c(at.tariff, rev(at.tariff))
	polygon(frame.x1, frame.y1, density=-1,col=rgb(170, 170, 170, 100, maxColorValue=255), border=NA)
	# Need to re-draw the lines
	lines(at.tariff, lower.2, col=rgb(170, 170, 170, 100, maxColorValue=255), lty=2, lwd=1.9)
	lines(at.tariff, upper.2, col=rgb(170, 170, 170, 100, maxColorValue=255), lty=2, lwd=1.9)
	lines(at.tariff, slopes.2, type="l", lwd=2.0, col=rgb(170, 170, 170, 200, maxColorValue=255))
	# sample information
	rug(tariff, ticksize=0.02, lwd=0.3, col="black")
	# Add Second Prediction
	frame.y2 <- c(lower.1,rev(upper.1))
	frame.x2 <- c(at.tariff, rev(at.tariff))
	polygon(frame.x2, frame.y2, density=-1,col=rgb(96, 96, 96, 125, maxColorValue=255), border=NA)
	# Need to re-draw the lines
	points(at.tariff, slopes.1, type = "l", lty = 1, ylim = c(-.5, 1), col=rgb(96, 96, 96,  250, maxColorValue=255), lwd=2)
	lines(at.tariff, upper.1, type = "l", col=rgb(96, 96, 96, 250, maxColorValue=255), lty=2, lwd=1.9)
	lines(at.tariff, lower.1, type = "l", col=rgb(96, 96, 96, 250, maxColorValue=255), lty=2, lwd=1.9)
	legend(.1,.99,col=c(rgb(170, 170, 170, 100, maxColorValue=255),rgb(60, 60, 60, 100, maxColorValue=255)), c("Low Fiscal Capacity (lower quartile)", "High Fiscal Capacity (higher quartile)"), bty="n", cex=1, fill=c(rgb(60, 60, 60, 250, maxColorValue=255),rgb(170, 170, 170, 250, maxColorValue=255)), border=c(rgb(60, 60, 60, 250, maxColorValue=255),rgb(170, 170, 170, 250, maxColorValue=255)))
#dev.off()




####################################
# FIGURE 2 - RIGHT
####################################

# COMPARE LOWER TO HIGHER DECILE OF FISCAL CAPACITY

# LOWEST DECILE
at.lowfc.1 <- -.15
		
# Predicted Expected Value
slopes.1 <- est.x + est.xz*at.tariff + est.xw*at.lowfc.1 + est.xzw*at.tariff*at.lowfc.1

SEs.1 <- rep(NA, length(at.tariff))

	for (i in 1: length(at.tariff)) {
			j <-at.tariff[i]
		SEs.1[i] = (var.x + (j^2)*(var.x.z) + (at.lowfc.1^2)*(var.x.w) + (j^2)*(at.lowfc.1^2)*(var.x.z.w) + 2*j*covar.x_x.z + 2*at.lowfc.1*covar.x_x.w + 2*j*at.lowfc.1*covar.x_x.w.z + 2*j*at.lowfc.1*covar.x.z_x.w + 2*(j^2)*at.lowfc.1*covar.x.z_x.w.z + 2*j*(at.lowfc.1^2)*covar.x.w_x.w.z)^(1/2)
 		}

upper.1 <- slopes.1 + 1.6449*SEs.1
lower.1 <- slopes.1 - 1.6449*SEs.1

cbind(at.tariff, slopes.1, upper.1, lower.1)

# HIGHEST DECILE
at.lowfc.2 <- -1.36
		
# Predicted Expected Value
slopes.2 <- est.x + est.xz*at.tariff + est.xw*at.lowfc.2 + est.xzw*at.tariff*at.lowfc.2

SEs.2 <- rep(NA, length(at.tariff))

		for (i in 1: length(at.tariff)) {
		j <-at.tariff[i]
		SEs.2[i] = (var.x + (j^2)*(var.x.z) + (at.lowfc.2^2)*(var.x.w) + (j^2)*(at.lowfc.2^2)*(var.x.z.w) + 2*j*covar.x_x.z + 2*at.lowfc.2*covar.x_x.w + 2*j*at.lowfc.2*covar.x_x.w.z + 2*j*at.lowfc.2*covar.x.z_x.w + 2*(j^2)*at.lowfc.2*covar.x.z_x.w.z + 2*j*(at.lowfc.2^2)*covar.x.w_x.w.z)^(1/2)
		}

# 90 CI, Two Tailed
upper.2 <- slopes.2 + 1.6449*SEs.2
lower.2 <- slopes.2 - 1.6449*SEs.2

cbind(at.tariff, slopes.2, upper.2, lower.2)

# pdf(".../Figure_2_right.pdf", width=10,height=7)	
plot(at.tariff, slopes.2, type = "l", lty = 1, ylim = c(-.5, 1), xlab = "Tariff Protection", ylab = "Marginal Effect of Obsolescence on Tax Compliance", lwd=2.5, col=rgb(170, 170, 170, 250, maxColorValue=255), frame.plot=FALSE, cex.lab = 1.3)
	lines(at.tariff, upper.2, col=rgb(170, 170, 170, 200, maxColorValue=255), lty=2, lwd=1)
	lines(at.tariff, lower.2, col=rgb(170, 170, 170, 200, maxColorValue=255), lty=2, lwd=1)
	# Zero-Line
	lines(at.tariff, rep(0, length(at.tariff)), type = "l", col = "black")
	# Grid for background
	grid(col = "gray80")
	# Makes Graph nicer
	frame.y1 <- c(lower.2,rev(upper.2))
	frame.x1 <- c(at.tariff, rev(at.tariff))
	polygon(frame.x1, frame.y1, density=-1,col=rgb(170, 170, 170, 100, maxColorValue=255), border=NA)
	# Need to re-draw the lines
	lines(at.tariff, lower.2, col=rgb(170, 170, 170, 100, maxColorValue=255), lty=2, lwd=1.9)
	lines(at.tariff, upper.2, col=rgb(170, 170, 170, 100, maxColorValue=255), lty=2, lwd=1.9)
	lines(at.tariff, slopes.2, type="l", lwd=2.0, col=rgb(170, 170, 170, 200, maxColorValue=255))
	# sample information
	rug(tariff, ticksize=0.02, lwd=0.3, col="black")
	# Add Second Prediction
	frame.y2 <- c(lower.1,rev(upper.1))
	frame.x2 <- c(at.tariff, rev(at.tariff))
	polygon(frame.x2, frame.y2, density=-1,col=rgb(96, 96, 96, 125, maxColorValue=255), border=NA)
	# Need to re-draw the lines
	points(at.tariff, slopes.1, type = "l", lty = 1, ylim = c(-.5, 1), col=rgb(96, 96, 96,  250, maxColorValue=255), lwd=2)
	lines(at.tariff, upper.1, type = "l", col=rgb(96, 96, 96, 250, maxColorValue=255), lty=2, lwd=1.9)
	lines(at.tariff, lower.1, type = "l", col=rgb(96, 96, 96, 250, maxColorValue=255), lty=2, lwd=1.9)
	legend(.1,.99,col=c(rgb(170, 170, 170, 100, maxColorValue=255),rgb(60, 60, 60, 100, maxColorValue=255)), c("Low Fiscal Capacity (1st decile)", "High Fiscal Capacity (9th decile)"), bty="n", cex=1, fill=c(rgb(60, 60, 60, 250, maxColorValue=255),rgb(170, 170, 170, 250, maxColorValue=255)), border=c(rgb(60, 60, 60, 250, maxColorValue=255),rgb(170, 170, 170, 250, maxColorValue=255)))
#dev.off()


